NOTES ON STATISTICS, PROBABILITY and MATHEMATICS


Graphs:


For parameter changes and options of the igraph() package, this is a reference.

To generate a random graph with the package igraph() the call is as follows:


GENERATING RANDOM UNDIRECTED GRAPHS:

library(igraph)
set.seed(5)
g = sample_gnp(30, 0.1, directed=F)
plot(g, 
     vertex.size=20, 
     vertex.color ='black',
     vertex.label.cex=1.2,
     vertex.label.color='white',
     edge.width=2,
     edge.color='red')

There number of nodes or vertices is the first entry (\(30\)), and the probability of having an edge or connection between any two vertices is \(0.1.\) The graph is un-directed.

The plotting is rather random, but it can be controlled as follows:

plot(g, 
     vertex.size=20, 
     vertex.color ='blue',
     vertex.label.cex=1.2,
     vertex.label.color='white',
     edge.width=2,
     edge.color=rgb(.2,.9,.3,.8),
     layout=layout_in_circle(g))

It is possible to take a random walk through the graph as follows:

# Graph name, starting point, number of steps, what to do if stuck:
random_walk(g, 9, 10, stuck='return')
## + 10/30 vertices, from 1586456:
##  [1]  9 29 22  4 15 20 25 17 16 17

The graph can be saved with the command write.graph(g, file='Graph.txt', format="edgelist").


Analyze a Network from Existing Data:

From this presentation the scenes in which two given characters in Les Mis appear together is in a file called EL, whereas the number of edges coming off each character is in NL:

EL = read.csv('https://raw.githubusercontent.com/RInterested/DATASETS/gh-pages/lesmis-el.csv')
head(EL)
##     Character1  Character2 Weight
## 1 Gillenormand JeanValjean      2
## 2      Zephine   Listolier      3
## 3         Joly     Feuilly      5
## 4       Brevet       Judge      2
## 5   Bamatabois JeanValjean      2
## 6     Gavroche JeanValjean      1
NL = read.csv('https://raw.githubusercontent.com/RInterested/DATASETS/gh-pages/lesmis-nl.csv')
head(NL)
##     Character1 Size
## 1 Gillenormand   29
## 2      Zephine   24
## 3         Joly   43
## 4       Brevet   11
## 5   Bamatabois   11
## 6     Gavroche   56
gr = graph_from_data_frame(d = EL, vertices = NL, directed=F)
plot(gr, vertex.label.cex=2, vertex.size=5, vertex.color="orange",
     edge.color="orange", vertex.frame.color="#ffffff",
     vertex.label.color="black",
     layout=layout_in_circle(gr))

# Or

plot(gr, vertex.label.cex=1.5, vertex.size=5, vertex.color="orange",
     edge.color="orange", vertex.frame.color="#ffffff",
     vertex.label.color="black")

plot(minimum.spanning.tree(gr), 
     vertex.label.cex = ifelse(vertex.attributes(gr)$name%in%c('JeanValjean','Cosette','Marius','Javert', 'Fantine', 'Gavroche'), 4, 2),
          vertex.label.color = ifelse(vertex.attributes(gr)$name%in%c('JeanValjean','Cosette','Marius','Javert', 'Fantine', 'Gavroche'), 'red', NA),
     vertex.size=5, 
     vertex.color="yellow",
     vertex.frame.color='yellow',
     edge.color="firebrick", 
     vertex.label.color="black")

Taking a look at the network:

gr
## IGRAPH 1705e45 UN-- 77 508 -- 
## + attr: name (v/c), Size (v/n), Weight (e/n)
## + edges from 1705e45 (vertex names):
##  [1] Gillenormand   --JeanValjean                          
##  [2] Zephine        --Listolier                            
##  [3] Joly           --Feuilly                              
##  [4] Brevet         --Judge                                
##  [5] Bamatabois     --JeanValjean                          
##  [6] Gavroche       --JeanValjean                          
##  [7] MadameHucheloup--Courfeyrac                           
##  [8] Gavroche       --Javert                               
## + ... omitted several edges

we see there are \(77\) nodes, \(508\) edges, and the types of links.

Also we can analyze the network to see who is more of an influencer:

sort(degree(gr), decreasing=T)[1:10]
##           JeanValjean              Gavroche                Marius 
##                    72                    44                    38 
##                Javert            Thenardier               Fantine 
##                    34                    32                    30 
##              Enjolras            Courfeyrac LAigleDeMeauxBossuet  
##                    30                    26                    26 
##                  Joly 
##                    24

We can look at edge.attributes(gr) or vertex.attributes(gr) or visualize the data as a matrix gr[].


Graph Cliques and Ramsey Numbers:

Here is a naive attempt at getting extremely lucky and being able to tighten the bounds for the Ramsey number \(R(5,5)\), which is currently bounded between \([43, 49]\) first by randomly generating red edges using a biased coin flip:

set.seed(0)
colors = c("darkred","blue4")
vcol = 'gold'
vlab = 'white'

# 5 5   [43, 49]    Exoo 1989b, McKay and Radziszowski 1995
n = 43
# R(5,5)
cl = 5
m = matrix(rbinom(n^2,1,.7),nrow=n)
m[lower.tri(m)] = 0  # It is an undirected graft
diag(m) = 0 # Avoiding self edges
g = graph_from_adjacency_matrix(m, mode='undirected')


plot(g, layout = layout_in_circle(g),
     edge.curved=0, 
     edge.color=colors[1],
     vertex.size=10, 
     vertex.color=vcol,
     vertex.label.color='red',
     vertex.label.cex=2)

The blue edges are similarly created as:

m.bar = matrix(rep(0, nrow(m)^2), nrow=nrow(m))
for(i in 1:nrow(m)){for(j in 1:ncol(m)){m.bar[i,j] <- ifelse(m[i,j]==0,1,0)}}
m.bar[lower.tri(m.bar)] = 0  
diag(m.bar) = 0
g.bar = graph_from_adjacency_matrix(m.bar, mode='undirected')
 
plot(g.bar, 
     layout=layout_in_circle(g.bar), 
     edge.curved=0, 
     edge.color=colors[2],
     vertex.color=vcol,
     vertex.size=10,
     vertex.label.cex=2,
     vertex.label.color="red")

These two colorations can now be merged into one complete graph:

plot(g, layout = layout_in_circle(g),
     edge.curved=0, 
     edge.color=colors[1],
     vertex.size=10, 
     vertex.color=vcol,
     vertex.label.color='red',
     vertex.label.cex=2)
plot(g.bar, 
     layout=layout_in_circle(g.bar), 
     edge.curved=0, 
     edge.width=2,
     edge.color=colors[2],
     vertex.color=vcol,
     vertex.size=10,
     vertex.label.cex=2,
     vertex.label.color="red",
     add=T)

We can check it is complete remembering that the edges of a complete graph with \(n\) vertices are \(n (n-1)/2:\)

u = union(g,g.bar)

print(paste("Is the graph complete? ", length(E(u))==n*(n-1)/2))
## [1] "Is the graph complete?  TRUE"

How many red and blue cliques of size \(5\) are there:

red.clust = cliques(g, min=cl, max=cl)
print(paste("How many red clicks of size", cl, "are there?", length(red.clust),"."))
## [1] "How many red clicks of size 5 are there? 22360 ."
blue.clust = cliques(g.bar, min=cl, max=cl)
print(paste("How many blue clicks of size", cl, "are there?", length(blue.clust),'.'))
## [1] "How many blue clicks of size 5 are there? 1 ."

And we can look for the largest cliques:

print(paste("The maximum red clique size is", clique_num(g)))
## [1] "The maximum red clique size is 11"
print(paste("The maximum blue clique size is", clique_num(g.bar)))
## [1] "The maximum blue clique size is 5"

Home Page

NOTE: These are tentative notes on different topics for personal use - expect mistakes and misunderstandings.